pull() the column CurrentJobTitleSelect. and get the values of the levels.fct_relevel()## Parsed with column specification:
## cols(
## .default = col_character(),
## Age = col_double()
## )
## See spec(...) for full column specifications.
| Name | multiple_choice_responses |
| Number of rows | 16716 |
| Number of columns | 47 |
| _______________________ | |
| Column type frequency: | |
| character | 46 |
| numeric | 1 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| LearningPlatformUsefulnessArxiv | 14325 | 0.14 | 10 | 15 | 0 | 3 | 0 |
| LearningPlatformUsefulnessBlogs | 11951 | 0.29 | 10 | 15 | 0 | 3 | 0 |
| LearningPlatformUsefulnessCollege | 13357 | 0.20 | 10 | 15 | 0 | 3 | 0 |
| LearningPlatformUsefulnessCompany | 15735 | 0.06 | 10 | 15 | 0 | 3 | 0 |
| LearningPlatformUsefulnessConferences | 14534 | 0.13 | 10 | 15 | 0 | 3 | 0 |
| LearningPlatformUsefulnessFriends | 15135 | 0.09 | 10 | 15 | 0 | 3 | 0 |
| LearningPlatformUsefulnessKaggle | 10133 | 0.39 | 10 | 15 | 0 | 3 | 0 |
| LearningPlatformUsefulnessNewsletters | 15627 | 0.07 | 10 | 15 | 0 | 3 | 0 |
| LearningPlatformUsefulnessCommunities | 15574 | 0.07 | 10 | 15 | 0 | 3 | 0 |
| LearningPlatformUsefulnessDocumentation | 14395 | 0.14 | 10 | 15 | 0 | 3 | 0 |
| LearningPlatformUsefulnessCourses | 10724 | 0.36 | 10 | 15 | 0 | 3 | 0 |
| LearningPlatformUsefulnessProjects | 11922 | 0.29 | 10 | 15 | 0 | 3 | 0 |
| LearningPlatformUsefulnessPodcasts | 15502 | 0.07 | 10 | 15 | 0 | 3 | 0 |
| LearningPlatformUsefulnessSO | 11076 | 0.34 | 10 | 15 | 0 | 3 | 0 |
| LearningPlatformUsefulnessTextbook | 12535 | 0.25 | 10 | 15 | 0 | 3 | 0 |
| LearningPlatformUsefulnessTradeBook | 16383 | 0.02 | 10 | 15 | 0 | 3 | 0 |
| LearningPlatformUsefulnessTutoring | 15290 | 0.09 | 10 | 15 | 0 | 3 | 0 |
| LearningPlatformUsefulnessYouTube | 11487 | 0.31 | 10 | 15 | 0 | 3 | 0 |
| CurrentJobTitleSelect | 4886 | 0.71 | 5 | 36 | 0 | 16 | 0 |
| MLMethodNextYearSelect | 5883 | 0.65 | 4 | 43 | 0 | 25 | 0 |
| WorkChallengeFrequencyPolitics | 14036 | 0.16 | 5 | 16 | 0 | 4 | 0 |
| WorkChallengeFrequencyUnusedResults | 14972 | 0.10 | 5 | 16 | 0 | 4 | 0 |
| WorkChallengeFrequencyUnusefulInstrumenting | 16077 | 0.04 | 5 | 16 | 0 | 4 | 0 |
| WorkChallengeFrequencyDeployment | 15869 | 0.05 | 5 | 16 | 0 | 4 | 0 |
| WorkChallengeFrequencyDirtyData | 13165 | 0.21 | 5 | 16 | 0 | 4 | 0 |
| WorkChallengeFrequencyExplaining | 15131 | 0.09 | 5 | 16 | 0 | 4 | 0 |
| WorkChallengeFrequencyPass | 16292 | 0.03 | 5 | 16 | 0 | 4 | 0 |
| WorkChallengeFrequencyIntegration | 15744 | 0.06 | 5 | 16 | 0 | 4 | 0 |
| WorkChallengeFrequencyTalent | 13720 | 0.18 | 5 | 16 | 0 | 4 | 0 |
| WorkChallengeFrequencyDataFunds | 15764 | 0.06 | 5 | 16 | 0 | 4 | 0 |
| WorkChallengeFrequencyDomainExpertise | 15308 | 0.08 | 5 | 16 | 0 | 4 | 0 |
| WorkChallengeFrequencyML | 15951 | 0.05 | 5 | 16 | 0 | 4 | 0 |
| WorkChallengeFrequencyTools | 15537 | 0.07 | 5 | 16 | 0 | 4 | 0 |
| WorkChallengeFrequencyExpectations | 15582 | 0.07 | 5 | 16 | 0 | 4 | 0 |
| WorkChallengeFrequencyITCoordination | 15547 | 0.07 | 5 | 16 | 0 | 4 | 0 |
| WorkChallengeFrequencyHiringFunds | 15429 | 0.08 | 5 | 16 | 0 | 4 | 0 |
| WorkChallengeFrequencyPrivacy | 15294 | 0.09 | 5 | 16 | 0 | 4 | 0 |
| WorkChallengeFrequencyScaling | 15883 | 0.05 | 5 | 16 | 0 | 4 | 0 |
| WorkChallengeFrequencyEnvironments | 15463 | 0.07 | 5 | 16 | 0 | 4 | 0 |
| WorkChallengeFrequencyClarity | 14537 | 0.13 | 5 | 16 | 0 | 4 | 0 |
| WorkChallengeFrequencyDataAccess | 14526 | 0.13 | 5 | 16 | 0 | 4 | 0 |
| WorkChallengeFrequencyOtherSelect | 16439 | 0.02 | 5 | 16 | 0 | 4 | 0 |
| WorkInternalVsExternalTools | 9959 | 0.40 | 11 | 45 | 0 | 6 | 0 |
| FormalEducation | 1701 | 0.90 | 15 | 65 | 0 | 7 | 0 |
| DataScienceIdentitySelect | 4045 | 0.76 | 2 | 22 | 0 | 3 | 0 |
| JobSatisfaction | 10039 | 0.40 | 1 | 23 | 0 | 11 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| Age | 331 | 0.98 | 32.37 | 10.47 | 0 | 25 | 30 | 37 | 100 | ▁▇▂▁▁ |
## Filtering for the variable `CurrentJobTitleSelect`, pull the number of levels it has.
number_of_levels %>%
filter(variable == "CurrentJobTitleSelect") %>%
pull()## [1] 16
pull() the column CurrentJobTitleSelect. and get the values of the levels.## [1] "Business Analyst"
## [2] "Computer Scientist"
## [3] "Data Analyst"
## [4] "Data Miner"
## [5] "Data Scientist"
## [6] "DBA/Database Engineer"
## [7] "Engineer"
## [8] "Machine Learning Engineer"
## [9] "Operations Research Practitioner"
## [10] "Other"
## [11] "Predictive Modeler"
## [12] "Programmer"
## [13] "Researcher"
## [14] "Scientist/Researcher"
## [15] "Software Developer/Software Engineer"
## [16] "Statistician"
responses_as_factors %>%
filter(!is.na(CurrentJobTitleSelect)) %>%
ggplot(aes(x = fct_infreq(CurrentJobTitleSelect))) + geom_bar(fill = "orange", color = "black") + coord_flip() + theme_minimal()responses_as_factors %>%
filter(!is.na(CurrentJobTitleSelect)) %>%
ggplot(aes(x = fct_rev(fct_infreq(CurrentJobTitleSelect)))) + geom_bar(fill = "orange", color = "black") + coord_flip() + theme_minimal()# Get the levels of WorkInternalVsExternalTools
levels(responses_as_factors$WorkInternalVsExternalTools)## [1] "Approximately half internal and half external"
## [2] "Do not know"
## [3] "Entirely external"
## [4] "Entirely internal"
## [5] "More external than internal"
## [6] "More internal than external"
# Reorder the levels from internal to external
responses_as_factors <- responses_as_factors %>%
mutate(WorkInternalVsExternalTools = fct_relevel(WorkInternalVsExternalTools, "Entirely internal", "More internal than external", "Approximately half internal and half external", "More external than internal","Entirely external" , "Do not know"))## [1] "Entirely internal"
## [2] "More internal than external"
## [3] "Approximately half internal and half external"
## [4] "More external than internal"
## [5] "Entirely external"
## [6] "Do not know"
responses_as_factors %>%
filter(!is.na(WorkInternalVsExternalTools)) %>%
ggplot(aes(x = WorkInternalVsExternalTools)) + geom_bar() + coord_flip()fct_relevel()## [1] "Bachelor's degree"
## [2] "Doctoral degree"
## [3] "I did not complete any formal education past high school"
## [4] "I prefer not to answer"
## [5] "Master's degree"
## [6] "Professional degree"
## [7] "Some college/university study without earning a bachelor's degree"
# Move "I did not complete any formal education past high school" and "Some college/university study without earning a bachelor's degree" to the front.
responses_as_factors <- responses_as_factors %>%
mutate(FormalEducation = fct_relevel(FormalEducation,
"I did not complete any formal education past high school", "Some college/university study without earning a bachelor's degree")) %>%
# Move "I prefer not to answer" to be the last level.
mutate(FormalEducation = fct_relevel(FormalEducation, "I prefer not to answer", after = Inf)) %>%
# Move "Doctoral degree" to be the sixth level (after the fifth level).
mutate(FormalEducation = fct_relevel(FormalEducation, "Doctoral degree", after = 5))## [1] "I did not complete any formal education past high school"
## [2] "Some college/university study without earning a bachelor's degree"
## [3] "Bachelor's degree"
## [4] "Master's degree"
## [5] "Professional degree"
## [6] "Doctoral degree"
## [7] "I prefer not to answer"
# make a bar plot of the frequency of FormalEducation
ggplot(responses_as_factors, aes(x= FormalEducation)) + geom_bar()# Now, rename "I did not complete any formal education past high school" to "High school" and "Some college/university study without earning a bachelor's degree" to "Some college".
responses_as_factors <- responses_as_factors %>%
mutate(FormalEducation = fct_recode(FormalEducation,
"High school" = "I did not complete any formal education past high school",
"Some college" = "Some college/university study without earning a bachelor's degree"))
# Test again and create a new bar plot
ggplot(responses_as_factors, aes(x= FormalEducation)) + geom_bar() + coord_flip()## [1] "Business Analyst"
## [2] "Computer Scientist"
## [3] "Data Analyst"
## [4] "Data Miner"
## [5] "Data Scientist"
## [6] "DBA/Database Engineer"
## [7] "Engineer"
## [8] "Machine Learning Engineer"
## [9] "Operations Research Practitioner"
## [10] "Other"
## [11] "Predictive Modeler"
## [12] "Programmer"
## [13] "Researcher"
## [14] "Scientist/Researcher"
## [15] "Software Developer/Software Engineer"
## [16] "Statistician"
Collapse the levels of CurrentJobTitleSelect into a new variable, grouped_titles.
responses_as_factors %>%
mutate(grouped_titles = fct_collapse(CurrentJobTitleSelect,
"Computer Scientist" = c("Programmer", "Software Developer/Software Engineer"),
"Researcher" = "Scientist/Researcher",
"Data Analyst/Scientist/Engineer" = c("DBA/Database Engineer", "Data Scientist",
"Business Analyst", "Data Analyst",
"Data Miner", "Predictive Modeler"))) %>%
# Then take grouped_titles and put everything that isn't one of those three grouped titles into "Other Title".
mutate(grouped_titles = fct_other(grouped_titles, keep = c("Computer Scientist",
"Researcher",
"Data Analyst/Scientist/Engineer"))) %>% count(grouped_titles)## Warning: Factor `grouped_titles` contains implicit NA, consider using
## `forcats::fct_explicit_na`
responses_as_factors %>%
# remove NAs of MLMethodNextYearSelect
filter(!is.na(MLMethodNextYearSelect)) %>%
# create ml_method, which lumps all those with less than 5% of people into "Other"
mutate(ml_method = fct_lump(MLMethodNextYearSelect, prop = 0.05)) %>%
# count the frequency of your new variable, sorted in descending order
count(ml_method, sort = TRUE)responses_as_factors %>%
# remove NAs
filter(!is.na(MLMethodNextYearSelect)) %>%
# create ml_method, retaining the 5 most common methods and renaming others "other method"
mutate(ml_method = fct_lump(MLMethodNextYearSelect, n = 5, other_level = "other method")) %>%
# count the frequency of your new variable, sorted in descending order
count(ml_method, sort = TRUE)# Select only the columns with `LearningPlatformUsefulness` in the name.
learning_platform_usefulness <- responses_as_factors %>%
select(contains("LearningPlatformUsefulness")) %>%
# Change the data from wide to long format with two columns, learning_platform and usefulness.
gather(learning_platform, usefulness) %>%
# Remove rows where usefulness is NA.
filter(!is.na(usefulness)) %>%
# Remove "LearningPlatformUsefulness" from each string in learning_platform.
mutate(learning_platform = str_remove(learning_platform, "LearningPlatformUsefulness"))## Observations: 55,203
## Variables: 2
## $ learning_platform <chr> "Arxiv", "Arxiv", "Arxiv", "Arxiv", "Arxiv", "Arx...
## $ usefulness <chr> "Very useful", "Very useful", "Somewhat useful", ...
# Use count() to change the dataset to have one row per learning_platform usefulness pair with a column that is the number of entries with that pairing.
learning_platform_usefulness %>%
count(learning_platform, usefulness) # Use add_count() to create a column with the total number of answers to that learning_platform.
learning_platform_usefulness %>%
count(learning_platform, usefulness) %>%
add_count(learning_platform, wt = n, name="nn")#Create a new column, perc, that's the percent of people giving a certain answer for that question. Save everything as a new dataset, perc_useful_platform.
(perc_useful_platform <- learning_platform_usefulness %>%
count(learning_platform, usefulness) %>%
add_count(learning_platform, wt = n, name="nn") %>%
mutate(perc = n/nn))# For each learning platform, create a line graph with usefulness on the x-axis and percentage of responses within the learning platforms on the y-axis.
ggplot(perc_useful_platform, aes(x = usefulness, y = perc, group = learning_platform, fill = usefulness)) + geom_col() + facet_wrap(~learning_platform) + theme_bw() + theme(
axis.ticks.x = element_blank(), axis.text.x = element_blank()
)# Using the dataset learning_platform_usefulness, change usefulness to equal 0 if someone answered "Not Useful" and 1 otherwise.
learning_platform_usefulness %>%
mutate(usefulness = ifelse(usefulness == "Not Useful",0,1))usefulness_by_platform <- learning_platform_usefulness %>%
mutate(usefulness = ifelse(usefulness == "Not Useful",0,1)) %>%
# Group the data by each platform.
group_by(learning_platform) %>%
# Create a new column, avg_usefulness, the mean usefulness of each platform.
summarise(avg_usefulness = mean(usefulness))
# Save the result as a new dataset, usefulness_by_platform
#examine the dataset
usefulness_by_platform# Create a scatter plot of avg_usefulness by learning_platform, using the dataset you created, usefulness_by_platform.
ggplot(usefulness_by_platform, aes(y = avg_usefulness, x = learning_platform)) + geom_point() + theme(
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, face = "bold")
) + labs(x = "Learning Platform", y = "Percent finding at least somewhat useful") +
# Change the y-axis scale to be a percentage
scale_y_continuous(labels = scales::percent) 2363137 2363314 2362483 ### Reordering graphs (Ascendingly)
# Order learning_platform in the graph by avg_usefulness so that, from left to right, it goes from highest usefulness to lowest.
ggplot(usefulness_by_platform, aes(y = avg_usefulness, x = fct_reorder(learning_platform, avg_usefulness))) + geom_point() + theme(
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, face = "bold")
) + labs(x = "Learning Platform", y = "Percent finding at least somewhat useful") +
# Change the y-axis scale to be a percentage
scale_y_continuous(labels = scales::percent)# Order learning_platform in the graph by avg_usefulness so that, from left to right, it goes from highest usefulness to lowest.
ggplot(usefulness_by_platform, aes(y = avg_usefulness, x = fct_rev(fct_reorder(learning_platform, avg_usefulness)))) + geom_point() + theme(
axis.text.x = element_text(angle = 90, hjust = 1, vjust = 0.5, face = "bold")
) + labs(x = "Learning Platform", y = "Percent finding at least somewhat useful") +
# Change the y-axis scale to be a percentage
scale_y_continuous(labels = scales::percent)# filter for rows where Age is between 10 and 90
responses_as_factors %>%
filter(Age >= 10 & Age <= 90) %>%
# Create the generation variable based on age
mutate(generation = case_when(
between(Age,10,22) ~ "Gen Z",
between(Age,23,37) ~ "Gen Y",
between(Age,38,52) ~ "Gen X",
between(Age,53,71) ~ "Baby Boomer",
between(Age,72,90) ~ "Silent",
)) %>%
count(generation)responses_as_factors %>%
# Filter out people who selected Data Scientist as their Job Title
filter(!(CurrentJobTitleSelect == "Data Scientist"))# Create a new variable, job_identity, based on their current job title and whether they fully identify as a data scientist.
responses_as_factors %>%
# Filter out people who selected Data Scientist as their Job Title
filter(CurrentJobTitleSelect != "Data Scientist") %>%
# Create a new variable, job_identity
mutate(job_identity = case_when(
CurrentJobTitleSelect == "Data Analyst" &
DataScienceIdentitySelect == "Yes" ~ "DS analysts",
CurrentJobTitleSelect == "Data Analyst" &
DataScienceIdentitySelect %in% c("No", "Sort of (Explain more)") ~ "NDS analyst",
CurrentJobTitleSelect != "Data Analyst" &
DataScienceIdentitySelect == "Yes" ~ "DS non-analysts",
TRUE ~ "NDS non analysts")) %>%
mutate(JobSatisfaction=parse_number(as.character(JobSatisfaction))) %>%
group_by(job_identity) %>%
summarise(avg_js = mean(JobSatisfaction, na.rm = TRUE))## Warning: 108 parsing failures.
## row col expected actual
## 37 -- a number I prefer not to share
## 115 -- a number I prefer not to share
## 167 -- a number I prefer not to share
## 403 -- a number I prefer not to share
## 427 -- a number I prefer not to share
## ... ... ........ .....................
## See problems(...) for more details.
## Parsed with column specification:
## cols(
## .default = col_character(),
## RespondentID = col_double()
## )
## See spec(...) for full column specifications.
| Name | flying_etiquette |
| Number of rows | 1040 |
| Number of columns | 27 |
| _______________________ | |
| Column type frequency: | |
| character | 26 |
| numeric | 1 |
| ________________________ | |
| Group variables | None |
Variable type: character
| skim_variable | n_missing | complete_rate | min | max | empty | n_unique | whitespace |
|---|---|---|---|---|---|---|---|
| How often do you travel by plane? | 0 | 1.00 | 5 | 21 | 0 | 6 | 0 |
| Do you ever recline your seat when you fly? | 182 | 0.82 | 5 | 19 | 0 | 5 | 0 |
| How tall are you? | 182 | 0.82 | 4 | 14 | 0 | 20 | 0 |
| Do you have any children under 18? | 189 | 0.82 | 2 | 3 | 0 | 2 | 0 |
| In a row of three seats, who should get to use the two arm rests? | 184 | 0.82 | 22 | 59 | 0 | 5 | 0 |
| In a row of two seats, who should get to use the middle arm rest? | 184 | 0.82 | 19 | 44 | 0 | 5 | 0 |
| Who should have control over the window shade? | 184 | 0.82 | 40 | 59 | 0 | 2 | 0 |
| Is itrude to move to an unsold seat on a plane? | 185 | 0.82 | 14 | 19 | 0 | 3 | 0 |
| Generally speaking, is it rude to say more than a few words tothe stranger sitting next to you on a plane? | 185 | 0.82 | 14 | 19 | 0 | 3 | 0 |
| On a 6 hour flight from NYC to LA, how many times is it acceptable to get up if you’re not in an aisle seat? | 185 | 0.82 | 4 | 38 | 0 | 6 | 0 |
| Under normal circumstances, does a person who reclines their seat during a flight have any obligation to the person sitting behind them? | 186 | 0.82 | 72 | 83 | 0 | 2 | 0 |
| Is itrude to recline your seat on a plane? | 186 | 0.82 | 14 | 19 | 0 | 3 | 0 |
| Given the opportunity, would you eliminate the possibility of reclining seats on planes entirely? | 186 | 0.82 | 2 | 3 | 0 | 2 | 0 |
| Is it rude to ask someone to switch seats with you in order to be closer to friends? | 190 | 0.82 | 14 | 19 | 0 | 3 | 0 |
| Is itrude to ask someone to switch seats with you in order to be closer to family? | 190 | 0.82 | 14 | 19 | 0 | 3 | 0 |
| Is it rude to wake a passenger up if you are trying to go to the bathroom? | 190 | 0.82 | 14 | 19 | 0 | 3 | 0 |
| Is itrude to wake a passenger up if you are trying to walk around? | 190 | 0.82 | 14 | 19 | 0 | 3 | 0 |
| In general, is itrude to bring a baby on a plane? | 191 | 0.82 | 14 | 19 | 0 | 3 | 0 |
| In general, is it rude to knowingly bring unruly children on a plane? | 191 | 0.82 | 14 | 19 | 0 | 3 | 0 |
| Have you ever used personal electronics during take off or landing in violation of a flight attendant’s direction? | 191 | 0.82 | 2 | 3 | 0 | 2 | 0 |
| Have you ever smoked a cigarette in an airplane bathroom when it was against the rules? | 191 | 0.82 | 2 | 3 | 0 | 2 | 0 |
| Gender | 33 | 0.97 | 4 | 6 | 0 | 2 | 0 |
| Age | 33 | 0.97 | 4 | 5 | 0 | 4 | 0 |
| Household Income | 214 | 0.79 | 6 | 19 | 0 | 5 | 0 |
| Education | 39 | 0.96 | 15 | 32 | 0 | 5 | 0 |
| Location (Census Region) | 42 | 0.96 | 7 | 18 | 0 | 9 | 0 |
Variable type: numeric
| skim_variable | n_missing | complete_rate | mean | sd | p0 | p25 | p50 | p75 | p100 | hist |
|---|---|---|---|---|---|---|---|---|---|---|
| RespondentID | 0 | 1 | 3432710995 | 610418.3 | 3431729581 | 3432265101 | 3432671861 | 3433152970 | 3436139758 | ▇▇▁▁▁ |
# useful trick here is mutate_if
flying_etiquette %>%
#change character variable to factor
mutate_if(is.character, as.factor) %>%
# Filter out those who have never flown on a plane
filter(`How often do you travel by plane?` != "Never")# Copy the previous chunk and save your work to gathered_data
gathered_data <- flying_etiquette %>%
#change character variable to factor
mutate_if(is.character, as.factor) %>%
# Filter out those who have never flown on a plane
filter(`How often do you travel by plane?` != "Never") %>%
# Select columns where "rude" is in the column name.
select(contains("rude")) %>%
# Change format from wide to long
gather("response_var", "value")## Warning: attributes are not identical across measure variables;
## they will be dropped
## Observations: 7,866
## Variables: 2
## $ response_var <chr> "Is itrude to move to an unsold seat on a plane?", "Is...
## $ value <chr> NA, "No, not rude at all", "No, not rude at all", "No,...
str_remove to remove everything before and including “rude to” (with the space at the end) in the response_var column.gathered_data %>%
# Remove everything before and including "rude to " (with that space at the end!)
mutate(response_var = str_remove(response_var,".*rude to ")) %>%
# Remove "on a plane"
mutate(response_var = str_remove(response_var, " on a plane"))dichotimized_data <- gathered_data %>%
# Remove everything before and including "rude to " (with that space at the end!)
mutate(response_var = str_replace(response_var,".*rude to ","")) %>%
# Remove "on a plane"
mutate(response_var = str_replace(response_var, " on a plane","")) %>%
# Remove rows with NA in the value column
filter(!is.na(value)) %>%
# Dichotomize the value variable to make a new variable, rude
mutate(rude = if_else(value %in% c("No, not rude at all", "No, not at all rude"), 0,1))response_var), and a new column, perc_rude, the mean of the rude column for each question.rude_behaviors and then view your new dataset.rude_behaviors <- gathered_data %>%
mutate(response_var = str_replace(response_var, '.*rude to ', '')) %>%
mutate(response_var = str_replace(response_var, 'on a plane', '')) %>%
# Remove rows that are NA in the value column
filter(!is.na(value)) %>%
mutate(rude = if_else(value %in% c("No, not rude at all", "No, not at all rude"), 0, 1)) %>%
# Group by response_var
group_by(response_var) %>%
# Create perc_rude, the percent considering each behavior rude
summarise(perc_rude = mean(rude))response_var by perc_ruderesponse_var by perc_rude. Save it as initial_plot.initial_plot <- rude_behaviors %>%
ggplot(aes(x = fct_reorder(response_var, perc_rude), y = perc_rude)) + geom_col()
#View your plot
initial_plottitled_plot.titled_plot <- initial_plot +
labs(title = "Hell Is Other People In A Pressurized Metal Tube",
subtitle = "Percentage of 874 air-passenger respondents who said action is very or somewhat rude",
caption = "Source: SurveyMonkey Audience", x="", y="")
titled_plotflipped_plot <- initial_plot +
coord_flip() +
# Remove the x-axis ticks and labels
theme(axis.text.x = element_blank(), axis.ticks.x = element_blank())
flipped_plotpercent() to perc_rude to add a label to each bar with the percent rude value, so it will display as “85%”.# adding labels to bar plots
flipped_plot + theme_bw() + geom_text(aes(label = percent(perc_rude),
y = perc_rude + 0.03),
position = position_dodge(0.9),
vjust = 1, color = "blue", fontface = "bold") + labs(y = "% rudeness", x = "Participants' responses") + theme(text = element_text(family = "serif", colour = "gray25", size = 20, face = "bold"))